{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "colab_type": "text",
        "id": "view-in-github"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/sugarforever/LangChain-Tutorials/blob/main/LangChain_Extraction.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 71,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "qfR8Z2HZPc7E",
        "outputId": "e6947c8a-8fac-44fa-f4ce-c1cc5250fcd0"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "Requirement already satisfied: langchain in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (0.0.139)\n",
            "Requirement already satisfied: numpy<2,>=1 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from langchain) (1.23.5)\n",
            "Requirement already satisfied: dataclasses-json<0.6.0,>=0.5.7 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from langchain) (0.5.7)\n",
            "Requirement already satisfied: pydantic<2,>=1 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from langchain) (1.10.7)\n",
            "Requirement already satisfied: gptcache>=0.1.7 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from langchain) (0.1.10)\n",
            "Requirement already satisfied: PyYAML>=5.4.1 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from langchain) (5.4.1)\n",
            "Requirement already satisfied: requests<3,>=2 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from langchain) (2.28.2)\n",
            "Requirement already satisfied: SQLAlchemy<2,>=1 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from langchain) (1.4.47)\n",
            "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from langchain) (3.8.4)\n",
            "Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from langchain) (4.0.2)\n",
            "Requirement already satisfied: tenacity<9.0.0,>=8.1.0 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from langchain) (8.2.2)\n",
            "Requirement already satisfied: openapi-schema-pydantic<2.0,>=1.2 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from langchain) (1.2.4)\n",
            "Requirement already satisfied: frozenlist>=1.1.1 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.3.3)\n",
            "Requirement already satisfied: yarl<2.0,>=1.0 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.8.2)\n",
            "Requirement already satisfied: multidict<7.0,>=4.5 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (6.0.4)\n",
            "Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (3.1.0)\n",
            "Requirement already satisfied: aiosignal>=1.1.2 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.3.1)\n",
            "Requirement already satisfied: attrs>=17.3.0 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (22.2.0)\n",
            "Requirement already satisfied: typing-inspect>=0.4.0 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from dataclasses-json<0.6.0,>=0.5.7->langchain) (0.8.0)\n",
            "Requirement already satisfied: marshmallow<4.0.0,>=3.3.0 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from dataclasses-json<0.6.0,>=0.5.7->langchain) (3.19.0)\n",
            "Requirement already satisfied: marshmallow-enum<2.0.0,>=1.5.1 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from dataclasses-json<0.6.0,>=0.5.7->langchain) (1.5.1)\n",
            "Requirement already satisfied: openai in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from gptcache>=0.1.7->langchain) (0.27.4)\n",
            "Requirement already satisfied: cachetools in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from gptcache>=0.1.7->langchain) (5.3.0)\n",
            "Requirement already satisfied: typing-extensions>=4.2.0 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from pydantic<2,>=1->langchain) (4.5.0)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from requests<3,>=2->langchain) (2022.12.7)\n",
            "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from requests<3,>=2->langchain) (1.26.15)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from requests<3,>=2->langchain) (3.4)\n",
            "Requirement already satisfied: greenlet!=0.4.17 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from SQLAlchemy<2,>=1->langchain) (2.0.2)\n",
            "Requirement already satisfied: packaging>=17.0 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from marshmallow<4.0.0,>=3.3.0->dataclasses-json<0.6.0,>=0.5.7->langchain) (23.0)\n",
            "Requirement already satisfied: mypy-extensions>=0.3.0 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from typing-inspect>=0.4.0->dataclasses-json<0.6.0,>=0.5.7->langchain) (1.0.0)\n",
            "Requirement already satisfied: tqdm in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from openai->gptcache>=0.1.7->langchain) (4.65.0)\n",
            "Note: you may need to restart the kernel to use updated packages.\n",
            "Requirement already satisfied: kor in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (0.8.0)\n",
            "Requirement already satisfied: pandas<2.0.0,>=1.5.3 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from kor) (1.5.3)\n",
            "Requirement already satisfied: langchain>=0.0.110 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from kor) (0.0.139)\n",
            "Requirement already satisfied: openai<0.28,>=0.27 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from kor) (0.27.4)\n",
            "Requirement already satisfied: numpy<2,>=1 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from langchain>=0.0.110->kor) (1.23.5)\n",
            "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from langchain>=0.0.110->kor) (3.8.4)\n",
            "Requirement already satisfied: requests<3,>=2 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from langchain>=0.0.110->kor) (2.28.2)\n",
            "Requirement already satisfied: PyYAML>=5.4.1 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from langchain>=0.0.110->kor) (5.4.1)\n",
            "Requirement already satisfied: async-timeout<5.0.0,>=4.0.0 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from langchain>=0.0.110->kor) (4.0.2)\n",
            "Requirement already satisfied: openapi-schema-pydantic<2.0,>=1.2 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from langchain>=0.0.110->kor) (1.2.4)\n",
            "Requirement already satisfied: SQLAlchemy<2,>=1 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from langchain>=0.0.110->kor) (1.4.47)\n",
            "Requirement already satisfied: gptcache>=0.1.7 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from langchain>=0.0.110->kor) (0.1.10)\n",
            "Requirement already satisfied: dataclasses-json<0.6.0,>=0.5.7 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from langchain>=0.0.110->kor) (0.5.7)\n",
            "Requirement already satisfied: tenacity<9.0.0,>=8.1.0 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from langchain>=0.0.110->kor) (8.2.2)\n",
            "Requirement already satisfied: pydantic<2,>=1 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from langchain>=0.0.110->kor) (1.10.7)\n",
            "Requirement already satisfied: tqdm in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from openai<0.28,>=0.27->kor) (4.65.0)\n",
            "Requirement already satisfied: pytz>=2020.1 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from pandas<2.0.0,>=1.5.3->kor) (2023.2)\n",
            "Requirement already satisfied: python-dateutil>=2.8.1 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from pandas<2.0.0,>=1.5.3->kor) (2.8.2)\n",
            "Requirement already satisfied: attrs>=17.3.0 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.0.110->kor) (22.2.0)\n",
            "Requirement already satisfied: multidict<7.0,>=4.5 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.0.110->kor) (6.0.4)\n",
            "Requirement already satisfied: aiosignal>=1.1.2 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.0.110->kor) (1.3.1)\n",
            "Requirement already satisfied: yarl<2.0,>=1.0 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.0.110->kor) (1.8.2)\n",
            "Requirement already satisfied: frozenlist>=1.1.1 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.0.110->kor) (1.3.3)\n",
            "Requirement already satisfied: charset-normalizer<4.0,>=2.0 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain>=0.0.110->kor) (3.1.0)\n",
            "Requirement already satisfied: marshmallow<4.0.0,>=3.3.0 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from dataclasses-json<0.6.0,>=0.5.7->langchain>=0.0.110->kor) (3.19.0)\n",
            "Requirement already satisfied: marshmallow-enum<2.0.0,>=1.5.1 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from dataclasses-json<0.6.0,>=0.5.7->langchain>=0.0.110->kor) (1.5.1)\n",
            "Requirement already satisfied: typing-inspect>=0.4.0 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from dataclasses-json<0.6.0,>=0.5.7->langchain>=0.0.110->kor) (0.8.0)\n",
            "Requirement already satisfied: cachetools in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from gptcache>=0.1.7->langchain>=0.0.110->kor) (5.3.0)\n",
            "Requirement already satisfied: typing-extensions>=4.2.0 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from pydantic<2,>=1->langchain>=0.0.110->kor) (4.5.0)\n",
            "Requirement already satisfied: six>=1.5 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from python-dateutil>=2.8.1->pandas<2.0.0,>=1.5.3->kor) (1.16.0)\n",
            "Requirement already satisfied: idna<4,>=2.5 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from requests<3,>=2->langchain>=0.0.110->kor) (3.4)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from requests<3,>=2->langchain>=0.0.110->kor) (2022.12.7)\n",
            "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from requests<3,>=2->langchain>=0.0.110->kor) (1.26.15)\n",
            "Requirement already satisfied: greenlet!=0.4.17 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from SQLAlchemy<2,>=1->langchain>=0.0.110->kor) (2.0.2)\n",
            "Requirement already satisfied: packaging>=17.0 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from marshmallow<4.0.0,>=3.3.0->dataclasses-json<0.6.0,>=0.5.7->langchain>=0.0.110->kor) (23.0)\n",
            "Requirement already satisfied: mypy-extensions>=0.3.0 in /Users/wyang14/.pyenv/versions/3.9.16/lib/python3.9/site-packages (from typing-inspect>=0.4.0->dataclasses-json<0.6.0,>=0.5.7->langchain>=0.0.110->kor) (1.0.0)\n",
            "Note: you may need to restart the kernel to use updated packages.\n"
          ]
        }
      ],
      "source": [
        "%pip install langchain\n",
        "%pip install kor"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 76,
      "metadata": {
        "id": "AGmcGwiiP_e_"
      },
      "outputs": [],
      "source": [
        "from langchain.chat_models import ChatOpenAI\n",
        "from langchain.llms import OpenAI\n",
        "\n",
        "from kor.extraction import create_extraction_chain\n",
        "from kor.nodes import Object, Text, Number"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 77,
      "metadata": {
        "id": "sH0lREGXR57O"
      },
      "outputs": [],
      "source": [
        "import json\n",
        "\n",
        "def json_dump(json_object):\n",
        "  json_formatted_str = json.dumps(json_object, indent=2, ensure_ascii=False)\n",
        "  print(json_formatted_str)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 78,
      "metadata": {
        "id": "vOeS_iMDQGK2"
      },
      "outputs": [],
      "source": [
        "import os\n",
        "\n",
        "OPENAI_API_KEY = os.environ['OPENAI_API_KEY']"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 79,
      "metadata": {
        "id": "OHNQODxSP7ya"
      },
      "outputs": [],
      "source": [
        "llm = ChatOpenAI(\n",
        "    model_name=\"gpt-3.5-turbo\",\n",
        "    temperature=0,\n",
        "    openai_api_key=OPENAI_API_KEY\n",
        ")\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 80,
      "metadata": {
        "id": "4ahT3U-GQW2i"
      },
      "outputs": [],
      "source": [
        "club_schema = Object(\n",
        "    id=\"club\",\n",
        "    description=\"Information about a club\",\n",
        "    \n",
        "    attributes=[\n",
        "        Text(\n",
        "            id=\"name\",\n",
        "            description=\"The name of the club.\"\n",
        "        ),\n",
        "        Text(\n",
        "            id=\"manager\",\n",
        "            description=\"The name of the manager.\"\n",
        "        ),\n",
        "        Number(\n",
        "            id=\"ranking\",\n",
        "            description=\"The ranking of the club.\"\n",
        "        )\n",
        "    ],\n",
        "    examples=[\n",
        "        (\n",
        "            \"Napoli managed by Spalletti are the 1st, Juventus managed by Allegri are the 5th.\",\n",
        "            [\n",
        "                {\"name\": \"Napoli\", \"manager\": \"Spalletti\", \"ranking\": 1},\n",
        "                {\"name\": \"Juventus\", \"manager\": \"Allegri\", \"ranking\": 5},\n",
        "            ],\n",
        "        )\n",
        "    ]\n",
        ")\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 81,
      "metadata": {
        "id": "jUrxHjVuRCYd"
      },
      "outputs": [],
      "source": [
        "extraction_chain = create_extraction_chain(llm, club_schema)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 82,
      "metadata": {
        "id": "X8K9_Bb6RJ0V"
      },
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "{\n",
            "  \"club\": [\n",
            "    {\n",
            "      \"name\": \"Arsenal\",\n",
            "      \"manager\": \"Arteta\",\n",
            "      \"ranking\": \"1\"\n",
            "    },\n",
            "    {\n",
            "      \"name\": \"Manchester City\",\n",
            "      \"manager\": \"Pep\",\n",
            "      \"ranking\": \"2\"\n",
            "    }\n",
            "  ]\n",
            "}\n"
          ]
        }
      ],
      "source": [
        "\n",
        "text='''\n",
        "    Manchester City are just 1 point after Arsenal. \n",
        "    Arsenal are on top of the table.\n",
        "    The 2 managers Pep and Arteta are going to meet face to face in this weekend.\n",
        "'''\n",
        "output = extraction_chain.predict_and_parse(text=text)['data']\n",
        "\n",
        "json_dump(output)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 83,
      "metadata": {},
      "outputs": [],
      "source": [
        "stock_market_schema = Object(\n",
        "    id=\"stock_market\",\n",
        "    description=\"A股市场收评，上涨板块综述\",\n",
        "    \n",
        "    # Notice I put multiple fields to pull out different attributes\n",
        "    attributes=[\n",
        "        Text(\n",
        "            id=\"industry_name\",\n",
        "            description=\"行业板块名称\"\n",
        "        )\n",
        "    ],\n",
        "    examples=[\n",
        "        (\n",
        "            '''\n",
        "                周二市场呈现出震荡调整格局，盘中行业板块及题材呈现局部轮动和分化，保险、煤炭、通信设备、银行、家用电器、IT设备、有色、电气设备等行业活跃。\n",
        "                酒店餐饮、半导体、旅游、多元金融、电器仪表、元器件、通用机械、商业连锁、医药等行业呈现回调。\n",
        "            ''',\n",
        "            [\n",
        "                {\"industry_name\": \"保险\"},\n",
        "                {\"industry_name\": \"煤炭\"},\n",
        "                {\"industry_name\": \"通信设备\"},\n",
        "                {\"industry_name\": \"银行\"},\n",
        "                {\"industry_name\": \"家用电器\"},\n",
        "                {\"industry_name\": \"IT设备\"},\n",
        "                {\"industry_name\": \"有色\"},\n",
        "                {\"industry_name\": \"电气设备\"}\n",
        "            ],\n",
        "        ),\n",
        "        (\n",
        "            '''\n",
        "                截至2023年4月14日收盘，上证指数收涨0.60%，报收3338.15点;深证成指涨0.51%，报收11800.09点;创业板指涨0.93%，报收2428.09点。\n",
        "\n",
        "                沪深两市全天成交额11150亿元，北向资金净买入64.24亿元，其中沪股通净买入21.58亿元，深股通净买入42.66亿元。\n",
        "\n",
        "                根据申万二级行业分类，2023年4月14日共有66个行业上涨，其中，能源金属、半导体、电子化学品、工业金属、小金属涨幅居前;\n",
        "\n",
        "                58个行业下跌，酒店餐饮、旅游及景区、广告营销、互联网电商、数字媒体跌幅居前。\n",
        "\n",
        "                2023年4月14日，沪深A股共有2655家公司上涨，46家公司涨幅超10%(含)，华星创业、耐科装备、联动科技、拓荆科技、经纬辉开涨幅居前;\n",
        "\n",
        "                有2102家公司下跌，4家公司跌幅超10%(含)，登康口腔、值得买、奥飞娱乐、慈文传媒、金桥信息跌幅居前。\n",
        "\n",
        "            ''',\n",
        "            [\n",
        "                {\"industry_name\": \"能源金属\"},\n",
        "                {\"industry_name\": \"半导体\"},\n",
        "                {\"industry_name\": \"电子化学品\"},\n",
        "                {\"industry_name\": \"工业金属\"},\n",
        "                {\"industry_name\": \"小金属\"}\n",
        "            ],\n",
        "        ),\n",
        "        (\n",
        "            '''\n",
        "                传媒、游戏、数据确权、贵金属等板块涨幅居前，工业母机、白酒股等板块飘绿。\n",
        "            ''',\n",
        "            [\n",
        "                {\"industry_name\": \"传媒\"},\n",
        "                {\"industry_name\": \"游戏\"},\n",
        "                {\"industry_name\": \"数字确权\"},\n",
        "                {\"industry_name\": \"贵金属\"}\n",
        "            ],\n",
        "        )\n",
        "    ]\n",
        ")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 84,
      "metadata": {},
      "outputs": [
        {
          "name": "stdout",
          "output_type": "stream",
          "text": [
            "{\n",
            "  \"stock_market\": [\n",
            "    {\n",
            "      \"industry_name\": \"旅游及景区\"\n",
            "    },\n",
            "    {\n",
            "      \"industry_name\": \"酒店餐饮\"\n",
            "    },\n",
            "    {\n",
            "      \"industry_name\": \"出版\"\n",
            "    },\n",
            "    {\n",
            "      \"industry_name\": \"教育\"\n",
            "    },\n",
            "    {\n",
            "      \"industry_name\": \"互联网电商\"\n",
            "    }\n",
            "  ]\n",
            "}\n"
          ]
        }
      ],
      "source": [
        "text='''\n",
        "    截至2023年4月13日收盘，上证指数收跌0.27%，报收3318.36点;深证成指跌1.21%，报收11739.84点;创业板指跌0.97%，报收2405.76点。\n",
        "\n",
        "    沪深两市全天成交额11369亿元，北向资金净卖出8688万元，其中沪股通净买入20.64亿元，深股通净卖出21.51亿元。\n",
        "\n",
        "    根据申万二级行业分类，2023年4月13日共有46个行业上涨，其中，旅游及景区、酒店餐饮、出版、教育、互联网电商涨幅居前;\n",
        "\n",
        "    78个行业下跌，通信设备、半导体、元件、消费电子、黑色家电跌幅居前。\n",
        "'''\n",
        "extraction_chain = create_extraction_chain(llm, stock_market_schema)\n",
        "output = extraction_chain.predict_and_parse(text=text)['data']\n",
        "\n",
        "json_dump(output)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {},
      "outputs": [],
      "source": []
    }
  ],
  "metadata": {
    "colab": {
      "authorship_tag": "ABX9TyM9UvOpcvHtzs9AC7xR1wdg",
      "include_colab_link": true,
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "name": "python",
      "version": "3.9.16"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}
